Loading packages

# NetCDF libraries
library(ncdf4)

# to use the index function
library(zoo)

# spatial
library(sp)

library(dplyr)
library(tidyverse)

library(spacetime)
library(trajectories)

# To plot the track of the glider
library(leaflet)

# writing geoJOSN
library(geojsonio)
library(geojson)

Providing sample NetCFD files and the path to the output GeoJSON files

Here, the directory in which the the netCDF files exist on disk should be provided. Additionally, the file name should also be provided in addition to the output directory for saving the GeoJSON file that has the extracted metadata.

# This is where the netCDF data files reside
files_Directory = "/home/fadi/DataX1/University/WWU/WWU 5/netCDF sample files/"
# This is the files that needs to be read
file_Name = "amerigo_coconet_R.nc"
# This is the complete path to the netCDF file that is being read
file_Path = paste0(files_Directory,file_Name)
# Reading the file
file = nc_open(file_Path)
# Providing the GeoJSON output directory
geoJSONOutput = "/home/fadi/DataX1/University/WWU/WWU 5/Task 2/GeoJson/geoJSON Output/"

1. Getting variabels names and dimensions

Getting the variabels available in the file

#Getting the names of variables in the netCDF file
variabels = names(file$var) # this is of type vector
#Getting the number of variables in the netCDF file
number_of_variables = length(variabels)

Getting the dimensions available in the file

#Getting the names of dimensions in the netCDF file
dimensions = names(file$dim) # this is of type vector
#Getting the number of dimensions in the netCDF file
number_of_dimensions = length(dimensions)

2. Formatiing time and getting coordinates

Writing a function to get the Time dimenson from the glider netCDF file

The following function extracts the time dimension from the glider netCDF file and converts it from double to Unix time. The function also extracts the time stamp and the date as well for further processing. This function accpets a ncdf4 object as an argument and returns a dataframe that has the formatted time in addition the timestamp and the date.

formatTimeDim <- function(file){
    tryCatch(
        expr = {
            # Getting the time dimension
            time = file$dim$TIME
            # Formatting time
            time_formatted = as.POSIXct(time$vals, origin="1970-01-01")
            # Extracting the time stamp from the unix time object
            timeStamp = format(time_formatted,'%H:%M:%S')
            # Extracting the date from the unix time object
            date = as.Date(time_formatted)
            # Combining the formatted time, the time stamp and the date in one data frame
            all = cbind.data.frame(time_formatted, timeStamp, date)
            
            message('The time dimension has been successfully formatted!')
          # returning the dataframe
          return(all)
            
        },
        error = function(e){
            message('Caught an error while formatting time!')
            print(e)
        },
        warning = function(w){
            message('Caught an warning while formatting time!')
            print(w)
        }
    )    
}

Combining lat and long with time

This function returns a dataframe that has the mission’s coordinate variables in addition to the formatted time dimension, time stamp and date. The NA data is removed from the resulting dataframe.

combineLatLongWithTime <- function(filePath){
    tryCatch(
        expr = {
              # Reading the file
              file = nc_open(filePath)
              # Getting the latitude and longitude
              lon = ncvar_get(file,"LONGITUDE")
              lat = ncvar_get(file,"LATITUDE")
              # Calling the time function to get the dataframe that has the time formatted
              time = formatTimeDim(file)
              # Combining the time data frame with lat and long. This dataframe has NA values
              dataframe = cbind.data.frame(lat, lon, time)
              # removing NA values
              dataframe_No_NA = dataframe %>% drop_na()
              
              return(dataframe_No_NA)
            
            message("Time successfully combined with Lat and Long!")
        },
        error = function(e){
            message('Caught an error while combining time with Lat and Long!')
            print(e)
        },
        warning = function(w){
            message('Caught an warning while combining time with Lat and Long!')
            print(w)
        }
    )    
}

3. Creating the mission track

A function to get the track of the netCDF file

This function creates a Track object of the library trajectories. It takes the a dataframe that has lat, lon, time_formatted, time stamp and date as an argument which is the output of the function combineLatLongWithTime and returns the Track object.

getMissionTrack = function(lat_lon_time_No_NA){
  tryCatch(
        expr = {
              # Setting the reference system
              crs = CRS("+proj=longlat +datum=WGS84")
              # Creating a spatial points object
              sp = SpatialPoints(lat_lon_time_No_NA %>% select(1:2),crs)
              # Getting time
              time = lat_lon_time_No_NA$time_formatted
              # Providing the mission's coordinates
              data = data.frame(lat_lon_time_No_NA %>% select(1:2))
              # Creating an STIDF object
              stidf = STIDF(sp, time, data)
              # Creating a track object
              gliderTrack = Track(stidf)
              # Printing a confirmation message in console
              message("Mission track object successfully created!")
              # Returning a track object
              return(gliderTrack)
        },
        error = function(e){
            message('Caught an error while creating mission track object!')
            print(e)
        },
        warning = function(w){
            message('Caught an warning while while creating mission track object!')
            print(w)
        }
    )
}

A function to get the first value of each day in the dataframe

This function also takes a dataframe that has lat, lon, time_fomratted, timestamp and date as an input which is the output of the function combineLatLongWithTime. The first measurement of each day of the glider’s mission is extracted with the coordinates and the time stamp. This will be used to add labels to the visual map representation of the generalized mission track. The purpose is to make the mission’s track more comprehendable by providing a sense of direction through those labels.

getMissionTrackLabels = function(lat_lon_time_No_NA){
    tryCatch(
        expr = {
              # Splitting the dataframe based on date
              # This results in a list of dataframes
              list = split(lat_lon_time_No_NA, lat_lon_time_No_NA$date)
              #This provides the first row of each day in the data frame
              first_days = do.call(rbind, (lapply(list, function(x) x[1,])))
              # Printing a confirmation message in console
              message("Mission track labels successfully created!")
              # Returning the labels list
              return(first_days)     
        },
        error = function(e){
            message('Caught an error while creating mission track labels!')
            print(e)
        },
        warning = function(w){
            message('Caught an warning while while creating mission track labels!')
            print(w)
        }
    )
}

Generalizing the mission track

This function generalizes the mission’s Track abject using the generalize function available in the trjectories library. This function returns a matrix containing the generalized track’s coordinates.

generalizeMissionTrack = function(missionTrack){
  tryCatch(
        expr = {
            # Generalizing the mission track
            generalizedTrack = generalize(missionTrack, distance = 2000, tol = 0.006)
            # Getting the coordinates of the generalized track
            generalizedTrackCoordinates = coordinates(generalizedTrack)
            message('Mission track is successfully generalized!')
            return(generalizedTrackCoordinates)     
        },
        error = function(e){
            message('Caught an error while generalizing the mission track!')
            print(e)
        },
        warning = function(w){
            message('Caught an warning while while generalizing the mission track!')
            print(w)
        }
    )
}

4. Plotting Tracks

This section focues on plotting the missions’ tracks before and after the generalization. This section is only meant for exploring the generalization process and is not part of the metadata that needs to be transported.

A function to plot the track of the netCDF file

This function takes the output of the function getMissionTrack and plots it on a leaflet map in R.

plotMissionTrack = function(gliderTrack){
  # plotting the map
   return(leaflet() %>%addTiles() %>% addPolylines(lat = gliderTrack@data[,1], lng = gliderTrack@data[,2]))
}

A function to plot the mission track with labels

This function plots the tracks with lables.

plotMissionTrackWithLabels = function(lat_lon_time_No_NA, gliderTrack){
  track = plotMissionTrack(gliderTrack)
  first_days = getMissionTrackLabels(lat_lon_time_No_NA)
  return (track %>% addAwesomeMarkers(data = first_days, lat = ~first_days$lat, lng = ~first_days$lon, label = first_days$time_formatted))
}

5. Saving metadata to a GeoJSON file

A function to convert coordinates from a martix or a dataframe to a numeric list

The following function converts the coordinates from a matrix to a numeric list so the geometry of the Feature saved in the GeojSON file can be properly represented.

coordsToNumericList = function(matrix){
  tryCatch(
        expr = {
            list = list()
            for(i in 1:length(matrix[,1])){
              list[[i]]= c(matrix[[i,1]], matrix[[i,2]])
            }
            message('The generalized track coordinates successfully converted a numeric list!')
            return(list)    
        },
        error = function(e){
            message('Caught an error while creating mission track labels!')
            print(e)
        },
        warning = function(w){
            message('Caught an warning while while creating mission track labels!')
            print(w)
        }
    )
}

Creating the GoeJSON file

Geometry is the generalized track coordinates saved in a matrix or a dataframe properties is a named list that has all the non spatial properties

Creating a linestring out of the generalized track

The following function creates a LineString out of the mission’s track object. The geometry is the coordinates of the generalized track object which should be converted to a numeric list using the coordsToNumericList function. The properties are a named list that has all the non spatial properties that are to be associate with the generalized track.

createLineString = function(geometry, properties){
  
  tryCatch(
        expr = {
            # Converting the coordinates from a dataframe to a list
            geometryList = coordsToNumericList(geometry)
            # Creating a list that contains the necessary information to create a linestring
            # Json object which will be used to create the linestring
            lineStringInfo = list(type = "LineString",coordinates = geometryList)
            # Converting the linestring info to JSON
            lineStringInfoJSON = rjson::toJSON(lineStringInfo)
            # Creating a linestring object out of the lineStringInfoJSON
            trackLineString = linestring(lineStringInfoJSON)
            # Creating the gosJSON StringLine object
            trackAsFeature = trackLineString %>% feature()  %>%  properties_add( .list = properties)

            message('The generalized track has been sucessfully created as a lineString!')
            
            return(trackAsFeature)
        },
        error = function(e){
            message('Caught an error while creating a linestring out of the generalized object!')
            print(e)
        },
        warning = function(w){
            message('Caught an warning while creating a linestring out of the generalized object!')
            print(w)
        }
    )
}

Creating MultiPoints out of the Track Labels

Goemetry is the coordinates of the first measurement of each day obtained from the function getMissionTrackLabels

The following function creates MutiPoint out of the mission’s track labels. The geometry is the coordinates of the generalized track object which should be converted to a numeric list using the coordsToNumericList function. The properties are a named list containing the time stamps of the lables.

createMultiPoint = function(geometry, properties){
  
  tryCatch(
        expr = {
            # Converting the coordinates from a dataframe to a list
            geometryListLabels = coordsToNumericList(geometry)
            # Creating a list that contains the necessary information to create a linestring
            # Json object which will be used to create the linestring
            MultipointsInfo = list(type = "MultiPoint",coordinates = geometryListLabels)
            # Converting the linestring info to JSON
            MultipointsInfoJSON = rjson::toJSON(MultipointsInfo)
            # Creating a linestring object out of the lineStringInfoJSON
            labelsMultipoint = multipoint(MultipointsInfoJSON)
            # Creating the gosJSON MultiPoint object
            labelsAsFeature = labelsMultipoint %>% feature()  %>%  properties_add( .list = properties)

            message('The mission track Labels sucessfully created as MultiPoint!')
            
            return(labelsAsFeature)
        },
        error = function(e){
            message('Caught an error while creating MultiPoint out of mission lables!')
            print(e)
        },
        warning = function(w){
            message('Caught an warning while creating while creating MultiPoint out of mission lables!')
            print(w)
        }
    )
}

Testing

Maybe this should be encapsulated into one function in the end

# Formatting time and getting lat and lon in one dataframe
lat_lon_time = combineLatLongWithTime(file_Path)
## The time dimension has been successfully formatted!
# Getting the mission track
missionTrack = getMissionTrack(lat_lon_time)
## Mission track object successfully created!
# Generalizing the track
generalizedTrack = generalizeMissionTrack(missionTrack)
## Mission track is successfully generalized!
# Getting the mission track labels
missionTrackLabels = getMissionTrackLabels(lat_lon_time)
## Mission track labels successfully created!

Plotting the tracks

Plotting the original track

# Plotting the original track without labels
plotMissionTrack(missionTrack)
# Plotting the original track with labels
plotMissionTrackWithLabels(lat_lon_time, missionTrack)
## Mission track labels successfully created!

Plotting the generalized track

generalizedTrackObject = generalize(missionTrack, distance = 2000, tol = 0.006)
# Plotting the original track without labels
plotMissionTrack(generalizedTrackObject)
# Plotting the original track with labels
plotMissionTrackWithLabels(lat_lon_time, generalizedTrackObject)
## Mission track labels successfully created!

Getting mission track information

I did not encapsulte this in a function because adding the non-spatial properties with nested lists does not work well.

# Getting the bounding box
bboxOriginal = missionTrack@sp@bbox
bbox = list(bboxOriginal[1,1], bboxOriginal[1,2], bboxOriginal[2,1], bboxOriginal[1,2])
# Getting the length of the track in KM
length = sum(missionTrack@connections$distance) /1000
# Getting the number of points the track has
numberOfPoints = length(missionTrack@sp)
# Getting the time period
ix = index(missionTrack@time)
tmin = min(ix)
tmax = max(ix)
timePeriod = paste0("[",tmin," , " ,tmax,"]")

Creating the non spatial properties

The mission track non-spatial properties

Extra non spatial properties can be added to this list

# Getting the mission non spatial properties
propertiesLineString = list("variabels names" = variabels, "number of variables"= number_of_variables,"dimensions names" = dimensions, "bbox" = bbox, "Track Length" = length, "Number of Points" = numberOfPoints, "Time Period" = timePeriod)

Getting the timestamps of the mission track labels

it is simply the third column of the track labels dataframe

propertiesMultiPoint = list("Time Stamps" = missionTrackLabels[,3])

Creating a featurecollection of the generalized track and the labels

# Creating a LineString object out of the generalized track
lineString = createLineString(generalizedTrack, propertiesLineString)
## The generalized track coordinates successfully converted a numeric list!
## Registered S3 method overwritten by 'geojsonlint':
##   method         from 
##   print.location dplyr
## The generalized track has been sucessfully created as a lineString!
# Creating a MultiPoint object out of the track labels
multiPoint = createMultiPoint(missionTrackLabels, propertiesMultiPoint)
## The generalized track coordinates successfully converted a numeric list!
## The mission track Labels sucessfully created as MultiPoint!
# Putting the LineString and the MultiPoint in a list
featureCollList = list(lineString, multiPoint)
# Creating the feature collection that will be saved to the geoJSON file
featureColl = featurecollection(featureCollList)

A function to save the metadata to a GeoJSON file

writeGeoJSON <- function(x, file) {
tryCatch(
        expr = {
            if (inherits(file, "file")) on.exit(close(file)) 
            cat(
              geo_pretty(x),
              "\n",
              file = file
            )
        },
        error = function(e){
            message('Caught an error while writing the GeoJSON file to disk!')
            print(e)
        },
        warning = function(w){
            message('Caught an warning while writing the GeoJSON file to disk!')
            print(w)
        }
    )
}

Saving the geoJSON file to disk

# Creating a name for the output file
outputFile = paste0(geoJSONOutput ,file_Name, "_metadata.goejson")
# writing the goeJSON file to disk
writeGeoJSON(featureColl, file = outputFile)